*Computation of alternative ESEC classification, based on GESIS files.

*Tim Goedemé, 16/07/2019

*Globals
********
global place1 ...\EU-SILC\Cross\2020-04\
global countries AT BE BG CH CY CZ DE DK EE EL ES FI FR HR HU IE IS IT LT LU LV MT NL NO PL PT RO RS SE SI SK UK


*Explore PL051 in DE, MT and SI

foreach ctry in DE MT SI {
    di "******************`ctry'**********************"
	forvalues year=2011/2018 {
	    di "`ctry': `year'"
		use "A:\Stata files\Oxfiles\EU-SILC\Cross\2020-04\\`ctry'\\`year'\c`ctry'`year'_all.dta" , clear
		ta year
		ta pl051
	}
}

foreach ctry in MT {
    di "******************`ctry'**********************"
	forvalues year=2007/2011 {
	    di "`ctry': `year'"
		use "A:\Stata files\Oxfiles\EU-SILC\Cross\2020-04\\`ctry'\\`year'\c`ctry'`year'_all.dta" , clear
		ta year
		ta pl050
	}
}


*Construction of social class variables
***************************************
*3 class structure, completely in accordance with Joan's notes, unless mentioned otherwise
***Original sources: 
**********https://www.gesis.org/fileadmin/upload/dienstleistung/daten/amtl_mikrodaten/europ_microdata/EU-SILC/Tools/ESeC_ESeG_Tools/ESeC/Stata/2014_esec08.do
**********https://www.gesis.org/fileadmin/upload/dienstleistung/daten/amtl_mikrodaten/europ_microdata/EU-SILC/Tools/ESeC_ESeG_Tools/ESeC/Stata/2005_esec88.do

*** Biggest changes to GESIS code: use PL040 to determine whether or not we look at size of the company + inclusion of family workers

foreach ctry of global countries {
    di "**************************************************************"
	di "                        `ctry'                                "
	di "**************************************************************"
	
	forvalues year=2004/2018 {
		di "`ctry': `year'"
		cap confirm file "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta"
		if _rc==0 {
			use "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta", clear
			
			rename _all, upper
			
			rename COUNTRY country
			rename YEAR year

			*5. ISCO-88 (PL050)
			********************
			cap drop ESEC88
			cap ta PL050 
		
			if _rc==0 {

				**1. employees vs. self-employed; family workers as employees

				cap drop empl88
				gen empl88 = (PL040==1 | PL040==2) if PL040!=. // 1 = self-employed; 0 = employee

				
				**2. Split self-employed according to the number of persons working at the local unit
				* adequate variable in EU-SILC is PL130 
				* generating new variable "emplno"
				* separate treatment for differently coded MT variable; also NL and DK (2014) variables have fewer categories, but probably not recoded in problematic way

				cap drop emplno
				gen emplno=.
				replace emplno=1 if (PL130<=9)   
				replace emplno=2 if (PL130>=10 & PL130<=13)
				replace emplno=1 if (PL130==14)
				replace emplno=2 if (PL130==15)

				*MT
				replace emplno=1 if (PL130==1 & country =="MT")     										
				replace emplno=1 if (PL130==2 & country =="MT")
				replace emplno=2 if (PL130==3 & country =="MT")
				replace emplno=2 if (PL130==4 & country =="MT")
				replace emplno=1 if (PL130==5 & country =="MT")
				replace emplno=2 if (PL130==6 & country =="MT")

				**3. Next differentiation is based on the managerial position of employees
				* adequate variable in EU-SILC is PL150: 1 = supervisory; 2 = non-supervisory

				**4. now combine the basic employment situation with the number of persons at the local 
				* unit and the managerial position
				* generating new variable "empstat"

				cap drop empstat88
				gen empstat88=.
				replace empstat88=1 if (empl88==1 & emplno==2 & PL040==1) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat88=2 if (empl88==1 & emplno==1) | PL040==2 // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat88=3 if (empl88==0 & PL150==1)
				replace empstat88=4 if (empl88==0 & PL150==2) | PL040==4 // ADDITION PL040 condition IS WHAT IS NEW in this approach
				label define empstat_lbl 1 "se10+" 2 "se<=10" 3 "sup" 4 "emp"
				label values empstat88 empstat_lbl

				/* cap drop empstat88b
				gen empstat88b=.
				replace empstat88b=1 if (empl88==1 & emplno==2 & PL040==1) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat88b=2 if (empl88==1 & emplno==1) | PL040==2 // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat88b=3 if (empl88==0 & PL150==1)  | (PL040==4 & ((PL130>=10 & PL130<=13) | PL130==15)) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat88b=4 if (empl88==0 & PL150==2) 	| (PL040==4 & (PL130<10 | PL130==14)) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				*label define empstat_lbl 1 "se10+" 2 "se<=10" 3 "sup" 4 "emp"
				label values empstat88b empstat_lbl

				cap drop empstat08b
				gen empstat08b=.
				replace empstat08b=1 if (empl08==1 & emplno==2 & PL040==1) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat08b=2 if (empl08==1 & emplno==1) | PL040==2 // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat08b=3 if (empl08==0 & PL150==1)  | (PL040==4 & ((PL130>=10 & PL130<=13) | PL130==15)) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat08b=4 if (empl08==0 & PL150==2)  | (PL040==4 & (PL130<10 | PL130==14)) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				label values empstat08b empstat_lbl
				*/
				gen ESEC88=PL050
				if "`ctry'"=="RO" & `year'==2010 replace ESEC88=PL051
				if "`ctry'"=="RO" & `year'==2010 replace ESEC88=. if PL051==25 // newly added

				*PT: PL050 categories 11,12=13 => 10 (Major group)
				recode ESEC88 13 =10 if country == "PT"   // only Portugal

				*MT: PL050 grouped => only Major groups, i.e. 1-digit
				recode ESEC88 ///
				1 =10 ///
				2 =20 ///
				3 =30 ///
				4 =40 ///
				5 =50 ///
				6 =60 ///
				7 =70 ///
				8 =80 ///
				9 =90 ///
				10 =1 ///
				if (country == "MT" & year>2007)


				***blocks of syntax for each employment status variable***
				*self-employed 10+
				recode ESEC88 ///
				1 10 11 12 12 13 21 22 23 24 31 32 33 34 41 42 51 52 61 71 72 73 74 81 82 83 91 92 93 20 30 40 50 60 70 80 90  =1 ///
				if empstat88 ==1 // note that 20, 30, 40, 50, 60, 70, 80, 90 do not exist in the original data.


				* self-employed <=10
				recode ESEC88 ///
				1 11 20 21 22 24  =1 ///
				23 31 32 =2 ///
				10 12 13 30 33 34 40 41 42 50 51 52 70 71 72 73 74 80 81 82 83 90 91 93 =4  ///
				60 61 92 =5 ///
				if empstat88 ==2

				// gen ESEC88b = ESEC88

				* supervisors
				recode ESEC88 /// 
				1 10 11 12 20 21 22 24  =1 ///
				13 23 30 31 32 33 34 40 41 42 =2 ///
				50 51 52 60 61 70 71 72 73 74 80 81 82 83 90 91 92 93 =6 ///
				if empstat88==3

				/* recode ESEC88b /// 
				1 10 11 12 20 21 22 24  =1 ///
				13 23 30 31 32 33 34 40 41 42 =2 ///
				50 51 52 60 61 70 71 72 73 74 80 81 82 83 90 91 92 93 =6 ///
				if empstat88b==3 */

				* employees
				recode ESEC88 /// 
				1 10 11 12 20 21 22 24 =1 /// 
				13 23 31 32 =2 /// 
				30 33 34 40 41 42  =3 /// 
				73 =6 /// 
				50 51 52 =7 /// 
				60 61 70 71 72 74 =8 /// 
				80 81 82 83 90 91 92 93 =9 /// 
				if empstat88==4

				/* recode ESEC88b /// 
				1 10 11 12 20 21 22 24 =1 /// 
				13 23 31 32 =2 /// 
				30 33 34 40 41 42  =3 /// 
				73 =6 /// 
				50 51 52 =7 /// 
				60 61 70 71 72 74 =8 /// 
				80 81 82 83 90 91 92 93 =9 /// 
				if empstat88b==4 */


				lab def ESEC88 ///
				1 "Large employers, higher mgrs/professionals" ///
				2 "Lower mgrs/professionals, higher supervisory/technicians" ///
				3 "Intermediate occupations" ///
				4 "Small employers and self-employed (non-agriculture)" ///
				5 "Small employers and self-employed (agriculture)" ///
				6 "Lower supervisors and technicians" ///
				7 "Lower sales and service"  ///
				8 "Lower technical" ///
				9 "Routine", modify  
				lab value ESEC88 ESEC88
				*ta ESEC88 year, m
				*ta ESEC88 year if PX040!=3, m

				/* lab def ESEC88b ///
				1 "Large employers, higher mgrs/professionals" ///
				2 "Lower mgrs/professionals, higher supervisory/technicians" ///
				3 "Intermediate occupations" ///
				4 "Small employers and self-employed (non-agriculture)" ///
				5 "Small employers and self-employed (agriculture)" ///
				6 "Lower supervisors and technicians" ///
				7 "Lower sales and service"  ///
				8 "Lower technical" ///
				9 "Routine", modify  
				lab value ESEC88 ESEC88
				ta ESEC88b year, m
				ta ESEC88b year if PX040!=3, m */

				* not all variables needed for generating ESEC are available for all respondents. This is especially
				* true for countries using the concept of "selected respondents"  (DK, FI, IS, NL, NO, SE, SI). In these countries
				* the variables PL130 (size ...) and PL150 (managerial status) are available only for selected respondents.
				* That is, in these countries ESEC can not be generated for PX040 = 3 (not selected respondent) because important
				* information is missing

				* ESEC for 'not selected respondent' is set to missing value *
				replace ESEC88=. if PX040==3
				*replace ESEC88b=. if PX040==3

				* ESEC for people for which there is no information about status in employment, size of unit or supervisory role is not available is set to missing value *
				replace ESEC88=. if empstat88==. | PL130==.
				// replace ESEC88b=. if empstat88b==. | PL130==.

				/* ta ESEC88 year, m
				ta ESEC88b year, m

				ta ESEC88 ESEC88b, m
				ta ESEC88 PL040
				ta ESEC88b PL040

				ta ESEC88 ESEC88b if PL040==4, m
				gen test=ESEC88!=ESEC88b
				ta test [iw=rb050] if PL040==4 // only 1 per cent of family workers change social class.
				ta test if PL040==4 */
			}
			
			else gen ESEC88=.

			**6. ISCO-08
			************
			cap drop ESEC08
			cap ta PL051
			
			if _rc==0 {
			    
				**1. employees vs. self-employed; family workers as employees

				cap drop empl08
				gen empl08 = (PL040==1 | PL040==2) if PL040!=. // 1 = self-employed; 0 = employee
				replace empl08=1 if PL031<5 & PL040==. // self-employed  || condition added that PL040 should be missing, otherwise more changes (this is different from Joan's code, but in line with GESIS code)
				replace empl08=0 if PL031<3 & PL040==. // employees		|| condition added that PL040 should be missing, otherwise more changes (this is different from Joan's code, but in line with GESIS code)

				**2. Split self-employed according to the number of persons working at the local unit
				* adequate variable in EU-SILC is PL130 
				* generating new variable "emplno"
				* separate treatment for differently coded MT variable; also NL and DK (2014) variables have fewer categories, but probably not recoded in problematic way

				cap drop emplno
				gen emplno=.
				replace emplno=1 if (PL130<=9)   
				replace emplno=2 if (PL130>=10 & PL130<=13)
				replace emplno=1 if (PL130==14)
				replace emplno=2 if (PL130==15)

				*MT
				replace emplno=1 if (PL130==1 & country =="MT")     										
				replace emplno=1 if (PL130==2 & country =="MT")
				replace emplno=2 if (PL130==3 & country =="MT")
				replace emplno=2 if (PL130==4 & country =="MT")
				replace emplno=1 if (PL130==5 & country =="MT")
				replace emplno=2 if (PL130==6 & country =="MT")

				**3. Next differentiation is based on the managerial position of employees
				* adequate variable in EU-SILC is PL150: 1 = supervisory; 2 = non-supervisory

				**4. now combine the basic employment situation with the number of persons at the local 
				* unit and the managerial position
				* generating new variable "empstat"

				cap drop empstat08
				gen empstat08=.
				replace empstat08=1 if (empl08==1 & emplno==2 & PL040==1) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat08=2 if (empl08==1 & emplno==1) | PL040==2 // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat08=3 if (empl08==0 & PL150==1) 
				replace empstat08=4 if (empl08==0 & PL150==2)  | PL040==4 // ADDITION PL040 condition IS WHAT IS NEW in this approach
				label values empstat08 empstat_lbl

				/* cap drop empstat88b
				gen empstat88b=.
				replace empstat88b=1 if (empl88==1 & emplno==2 & PL040==1) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat88b=2 if (empl88==1 & emplno==1) | PL040==2 // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat88b=3 if (empl88==0 & PL150==1)  | (PL040==4 & ((PL130>=10 & PL130<=13) | PL130==15)) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat88b=4 if (empl88==0 & PL150==2) 	| (PL040==4 & (PL130<10 | PL130==14)) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				*label define empstat_lbl 1 "se10+" 2 "se<=10" 3 "sup" 4 "emp"
				label values empstat88b empstat_lbl

				cap drop empstat08b
				gen empstat08b=.
				replace empstat08b=1 if (empl08==1 & emplno==2 & PL040==1) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat08b=2 if (empl08==1 & emplno==1) | PL040==2 // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat08b=3 if (empl08==0 & PL150==1)  | (PL040==4 & ((PL130>=10 & PL130<=13) | PL130==15)) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				replace empstat08b=4 if (empl08==0 & PL150==2)  | (PL040==4 & (PL130<10 | PL130==14)) // ADDITION PL040 condition IS WHAT IS NEW in this approach
				label values empstat08b empstat_lbl
				*/
			
				gen ESEC08=PL051
				replace ESEC08=. if country=="RO" & year==2010

				**Treatment of countries with non-perfect PL051: DE (since 2015); IE; MT; SI (since 2015); SK (until 2014, incl.)

				*MT&SI: PL051 grouped => only Major groups, i.e. 1-digit
				*tab PL051 if PB020 =="MT"
				recode ESEC08 ///
				1 =10 ///
				2 =20 ///
				3 =30 ///
				4 =40 ///
				5 =50 ///
				6 =60 ///
				7 =70 ///
				8 =80 ///
				9 =90 ///
				10 =1 ///
				if country == "MT" | (country=="DE" & year>=2015)  // only Malta and Germany since 2015 (added by TG)

				*tab PL051 if PB020 =="SI"
				recode ESEC08 ///
				1 =10 ///
				2 =20 ///
				3 =30 ///
				4 =40 ///
				5 =50 ///
				6 =60 ///
				7 =70 ///
				8 =80 ///
				9 =90 ///
				0 =1 ///
				if country == "SI" & year>=2014  // only Slovenia
				replace ESEC08 = . if ESEC08==0 // 16 cases in IE in 2012; line added by TG.


				*PT: PL051 categories 11,12,13,14=14 => 10 (Major group)
				recode ESEC08 ///
				11 12 13 14 =10 ///
				if country == "PT"   // only Portugal (TG: expanded to cover also 11, 12, and 13 categories)


				***blocks of syntax for each employment status variable***
				*self-employed 10+
				recode ESEC08 ///
				1 10 11 12 13 14 20 21 22 23 24 25 26 30 31 32 33 34 35 40 41 42 ///
				43 44 50 51 52 53 60 61 62 70 71 72 73 74 75 80 81 82 83 90 ///
				91 92 93 94 95 96 =1 ///
				2 3 54 =3 ///
				63 =5 ///
				if empstat08 ==1


				* self-employed <=10
				recode ESEC08 ///
				1 11 20 21 24 26 =1 ///
				22 23 25 31 32 33 =2 ///
				2 3 54 =3 ///
				10 12 13 14 30 34 35 40 41 42 43 44 50 51 52 53 70 71 72 73 74 75 80 81 82 83 90 91 94 95 96 =4  ///
				60 61 62 63 92 93 =5 ///
				if empstat08 ==2


				* supervisors
				recode ESEC08 /// 
				1 10 11 12 13 20 21 24 26 =1 ///
				2 3 14 22 23 25 30 31 32 33 34 35 40 41 43 =2 ///
				42 44 50 51 52 53 54 60 61 62 63 70 71 72 73 74 75 80 81 82 83 90 91 92 93 94 95 96 =6 ///
				if empstat08==3


				* employees
				recode ESEC08 /// 
				1 10 11 12 20 21 24 26 =1 /// 
				13 14 22 23 25 31 32 33 =2 /// 
				2 3 30 34 35 40 41 43 44 =3 /// 
				63 =5 /// 
				42 50 51 52 53 54 =7 /// 
				60 61 62 70 71 72 73 74 75=8 /// 
				80 81 82 83 90 91 92 93 94 95 96=9 /// 
				if empstat08==4


				 
				*not all variables needed for generating ESEC are available for all respondents. This is especially
				*true for countries using the concept of "selected respondents"  (DK, FI, IS, NL, NO, SE, SI). In these countries
				*the variables PL130 (size ...) and PL150 (managerial status) are available only for selected respondents.
				*That is, in these countries ESEC can not be generated for PX040 = 3 (not selected respondent) because important
				*information is missing

				* tab PB020 PX040,m
				* tab PL130 PX040,m
				* tab PL150 PX040,m
				* tab ESEC08 PX040, m

				*ESEC for 'not selected respondent' is set to missing value *
				replace ESEC08=. if PX040==3


				*Moreover, because PL130 is not asked for respondents currently not working, ESeC can not be assigned to these respondents
				replace ESEC08=. if PL130==. | empstat08==.

				numlabel, add
				tab ESEC08 year,m

				*tab PB020 ESEC08, nof row
				*tab ESEC08 empstat, m

				lab def ESEC08 ///
				1 "Large employers, higher mgrs/professionals" ///
				2 "Lower mgrs/professionals, higher supervisory/technicians" ///
				3 "Intermediate occupations" ///
				4 "Small employers and self-employed (non-agriculture)" ///
				5 "Small employers and self-employed (agriculture)" ///
				6 "Lower supervisors and technicians" ///
				7 "Lower sales and service"  ///
				8 "Lower technical" ///
				9 "Routine", modify 
				lab value ESEC08 ESEC08
			}
			else gen ESEC08=.
			
			*Save file
			rename _all, lower
			keep country year psu1 strata1 rb050 pb040 hid pid pl040 px040 rb080 esec* emp*
			compress
			
			save "${place1}\`ctry'\\`year'\c`ctry'`year'_ESEC.dta", replace
			
		}
	}	
}





